##
## CT -- Creating the Collapsed File
##

rm(list=ls())

# Library
library(readr)
library(dplyr)
library(tidyr)

# Set the Working Directory
setwd("~/Desktop/PolicingTStops/BadApples/Replication/")

# Loading the data. 
load("Data/CT_FullFile.RData")

# Filtering to counties and municipalities. 
ct = ct %>% filter(grepl("CSP",`Department Name`)==F&
                     grepl("MTA",`Department Name`)==F&
                     grepl("Tribal",`Department Name`)==F&
                     grepl("CSU",`Department Name`)==F&
                     grepl("CAPITOL",`Department Name`)==F&
                     grepl("DMV",`Department Name`)==F&
                     grepl("State",`Department Name`)==F&
                     grepl("Yale",`Department Name`)==F&
                     grepl("UCONN",`Department Name`)==F&
                     grepl("CSP",`Department Name`)==F&
                     grepl("State",`Department Name`)==F&
                     grepl("UCONN",`Department Name`)==F&
                     grepl("CSU",`Department Name`)==F&
                     grepl("DMV",`Department Name`)==F&
                     grepl("Yale",`Department Name`)==F&
                     grepl("MTA",`Department Name`)==F&
                     grepl("CAPITOL",`Department Name`)==F)

# Fixing a misname.
ct$`Department Name` = ifelse(ct$`Department Name`=="Mashantucket Pequot Police",
                              "Mashantucket Pequot",ct$`Department Name`)

# Indicators
table(ct$SearchAuthorizationCode)
ct$search.pc = ifelse(ct$SearchAuthorizationCode=="O",1,
                      ifelse(ct$SearchAuthorizationCode=="N",0,NA))
ct$search.c = ifelse(ct$SearchAuthorizationCode=="C",1,
                     ifelse(ct$SearchAuthorizationCode=="N",0,NA))
ct$search.all = ifelse(ct$SearchAuthorizationCode=="C"|ct$SearchAuthorizationCode=="O",1,
                       ifelse(ct$SearchAuthorizationCode=="N",0,NA))
ct$contra.pc = ifelse(ct$search.pc==1,ct$ContrabandIndicator,
                      ifelse(ct$ContrabandIndicator==1,NA,0))
ct$contra.c = ifelse(ct$search.c==1,ct$ContrabandIndicator,
                     ifelse(ct$ContrabandIndicator==1,NA,0))
ct$contra.all = ifelse(ct$search.all==1,ct$ContrabandIndicator,
                       ifelse(ct$ContrabandIndicator==1,NA,0))
ct$stop.pc = ifelse(is.na(ct$search.pc),NA,
                    ifelse(ct$ContrabandIndicator==1&
                             ct$SearchAuthorizationCode=="N",NA,1))
ct$stop.c = ifelse(is.na(ct$search.c),NA,
                   ifelse(ct$ContrabandIndicator==1&
                            ct$SearchAuthorizationCode=="N",NA,1))
ct$stop.all = ifelse(is.na(ct$search.all),NA,
                     ifelse(ct$ContrabandIndicator==1&
                              ct$SearchAuthorizationCode=="N",NA,1))

# Aggregation
ct.agency.all = aggregate(ct[,c("stop.all","search.all","contra.all")],
                          by=list(ct$`Department Name`,ct$race.ethnicity),
                          function(x){sum(x,na.rm=T)})
ct.agency.all$Group.3 = "all"
colnames(ct.agency.all) = c("agency","race","stop","search","contra","type")

ct.agency.pc = aggregate(ct[,c("stop.pc","search.pc","contra.pc")],
                         by=list(ct$`Department Name`,ct$race.ethnicity),
                         function(x){sum(x,na.rm=T)})
ct.agency.pc$Group.3 = "PC"
colnames(ct.agency.pc) = c("agency","race","stop","search","contra","type")

ct.ag2 = bind_rows(ct.agency.all,ct.agency.pc)
colnames(ct.ag2)

summary(ct.ag2[ct.ag2$type=="PC",])
summary(ct.ag2[ct.ag2$type=="all",])

# Saving
save(ct.ag2,file="Data/CT_Agency_Search.RData")


###
### Analysis
###

# Clearing the Space
rm(list=ls())

# Libraries
library(readr)
library(dplyr)
library(tidyr)
library(rstan)
library(ggplot2)

# Function
basic_model_fit = function(data = NULL, 
                           model_file = "Scripts/fasttt-master/stan_models/model_mixture.stan",
                           iter_n=12000,seed.set=2938407,
                           delta=0.9,max_trees=12){
  require(rstan)
  
  # Agency set up for running Stan model. 
  stan_data = list(N = nrow(data),
                   R = length(unique(data$race)),
                   D = length(unique(data$agency)),
                   r = as.integer(as.factor(data$race)),
                   d = as.integer(as.factor(data$agency)),
                   n = data$stop,
                   s = data$search,
                   h = data$contra)
  
  # Running the model. 
  rstan_options(auto_write = TRUE)
  options(mc.cores = 5)
  fit = stan(model_file, data=stan_data, 
             iter=iter_n, 
             chains=5, 
             cores=5, 
             seed = seed.set,
             control = list(adapt_delta = delta, max_treedepth = max_trees, 
                            adapt_engaged = !FALSE))
  
  return(fit)
}

# Load in the collapsed data set. 
load("Data/CT_Agency_Search.RData")

# Cleaning the data set, instituting thresholds, and cropping it. 
ct.ag2.thres = pivot_wider(data=ct.ag2,
                           id_cols=c("agency","type"),
                           names_from="race",
                           values_from = c("stop","search","contra"))
ct.ag2.thres$keep_id = paste(ct.ag2.thres$agency,ct.ag2.thres$type,sep="::")
ct.ag2$keep_id = paste(ct.ag2$agency,ct.ag2$type,sep="::")
ct.keep = ct.ag2.thres$keep_id[which(ct.ag2.thres$stop_Black>49&
                                       ct.ag2.thres$stop_White>49&
                                       ct.ag2.thres$search_Black>0&
                                       ct.ag2.thres$search_White>0)]
ct.ag2$include = ifelse(as.character(ct.ag2$keep_id)%in%ct.keep,1,0)

table(ct.ag2$include)

ct.agency.sm = subset(ct.ag2, ct.ag2$include==1 & ct.ag2$race!="Latinx")

comp.agency = names(table(ct.agency.sm$agency))[table(ct.agency.sm$agency)==4]
comp = ct.agency.sm %>% 
  filter(agency %in%comp.agency)

# Fitting the models,
pc.stan = basic_model_fit(data=comp%>%filter(type=="PC"))
all.stan = basic_model_fit(data=comp%>%filter(type=="all"))

# Diagnostic Plots
set.seed(987)
t_for_trace = sample(1:166,8)
t_names_for_trace = paste("t_i[",t_for_trace,"]",sep="")

png("Figures/CT_Traceplot_Search_PC50.png",1254,769)
traceplot(pc.stan,
          pars=t_names_for_trace,nrow=2)
dev.off()

png("Figures/CT_Density_Search_PC50.png",629,592)
plot(pc.stan, show_density=TRUE, 
     pars=t_names_for_trace, 
     ci_level=0.95, outer_level=0.999)
dev.off()

png("Figures/CT_Traceplot_Search_All50.png",1254,769)
traceplot(all.stan,
          pars=t_names_for_trace,nrow=2)
dev.off()

png("Figures/CT_Density_Search_All50.png",629,592)
plot(all.stan, show_density=TRUE, 
     pars=t_names_for_trace, 
     ci_level=0.95, outer_level=0.999)
dev.off()

# Extracting the summaries
pc.summary = summary(pc.stan)
all.summary = summary(all.stan)

max(pc.summary$summary[,10],na.rm=T)
max(all.summary$summary[,10],na.rm=T)

pc.ti = pc.summary$summary[2:(nrow(comp%>%filter(type=="PC"))+1),]
pc.output = data.frame("fasttt.mean.pc" = pc.ti[,1],
                       "fasttt.lower.pc" = pc.ti[,1]-
                         1.96*pc.ti[,2],
                       "fasttt.upper.pc" = pc.ti[,1]-
                         1.96*pc.ti[,2])

all.ti = all.summary$summary[2:(nrow(comp%>%filter(type=="all"))+1),]
all.output = data.frame("fasttt.mean.all" = all.ti[,1],
                      "fasttt.lower.all" = all.ti[,1]-
                        1.96*all.ti[,2],
                      "fasttt.upper.all" = all.ti[,1]-
                        1.96*all.ti[,2])

ct.search = cbind(comp[1:172,c("agency","race")],all.output,pc.output)
ct.agency.wide = tidyr::pivot_wider(data=ct.search,
                                    id_cols="agency",
                                    names_from="race",
                                    values_from = colnames(ct.search)[3:8])

ct.agency.wide = ct.agency.wide%>%
  mutate(trr.pc = fasttt.mean.pc_White/fasttt.mean.pc_Black,
         trr.all = fasttt.mean.all_White/fasttt.mean.all_Black)

# Saving the data set.
save(ct.agency.wide,file="Data/CT_SearchPurpose50_All.RData")

t.test(x=ct.agency.wide$trr.all,
       y=ct.agency.wide$trr.pc,
       paired=T)

t.test(ct.agency.wide$fasttt.mean.all_Black,
       ct.agency.wide$fasttt.mean.pc_Black,
       paired = T)

t.test(ct.agency.wide$fasttt.mean.all_White,
       ct.agency.wide$fasttt.mean.pc_White,
       paired = T)

sd(ct.agency.wide$trr.all)
sd(ct.agency.wide$fasttt.mean.all_Black)
sd(ct.agency.wide$fasttt.mean.all_White)
